Importing necessary packages

Load the csv data and test that it loaded

df = read.csv('data.csv')
head(df)
##   fips     lat     long  county province_state         combined_key       date
## 1 1001 32.5395 -86.6441 Autauga        Alabama Autauga, Alabama, US 2021-08-01
## 2 1003 30.7277 -87.7221 Baldwin        Alabama Baldwin, Alabama, US 2021-08-01
## 3 1005 31.8683 -85.3871 Barbour        Alabama Barbour, Alabama, US 2021-08-01
## 4 1007 32.9964 -87.1251    Bibb        Alabama    Bibb, Alabama, US 2021-08-01
## 5 1009 33.9821 -86.5679  Blount        Alabama  Blount, Alabama, US 2021-08-01
## 6 1011 32.1003 -85.7127 Bullock        Alabama Bullock, Alabama, US 2021-08-01
##   tests_combined_total
## 1                29714
## 2               118240
## 3                12471
## 4                14814
## 5                27943
## 6                 6155

Clean up the data

  1. Remove empty string states
  2. Remove rows that have NA tests
  3. Select the state and test amount, the rest get ignored
  4. Group by the state
  5. Get the max of every total (max of every total = the complete total for that state)
  6. Remove states with a 0 total (0 total = no data)
df <- df %>% 
  filter(province_state != "") %>%                                #1
  filter(!is.na(tests_combined_total)) %>%                        #2
  select(province_state, tests_combined_total) %>%                #3
  group_by(province_state) %>%                                    #4
  summarize(tests_combined_total = max(tests_combined_total)) %>% #5
  filter(tests_combined_total != 0)                               #6

df
## # A tibble: 33 × 2
##    province_state       tests_combined_total
##    <chr>                               <int>
##  1 Alabama                           1787546
##  2 Alaska                            1680253
##  3 Arizona                          12408935
##  4 Arkansas                           891151
##  5 California                       73014784
##  6 Connecticut                       4273086
##  7 Delaware                          2099374
##  8 District of Columbia              3067883
##  9 Hawaii                            2697248
## 10 Illinois                         11237849
## # … with 23 more rows

Rename columns

df <- rename(df, "state" = "province_state")
df <- rename(df, "total_tests" = "tests_combined_total")

df
## # A tibble: 33 × 2
##    state                total_tests
##    <chr>                      <int>
##  1 Alabama                  1787546
##  2 Alaska                   1680253
##  3 Arizona                 12408935
##  4 Arkansas                  891151
##  5 California              73014784
##  6 Connecticut              4273086
##  7 Delaware                 2099374
##  8 District of Columbia     3067883
##  9 Hawaii                   2697248
## 10 Illinois                11237849
## # … with 23 more rows

Map it

library(usmap)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
map <- plot_usmap(data = df, values = "total_tests", color = "grey") +
  scale_fill_continuous(
    low = "white", high = "#46e8fa", name = "Covid Tests by State (2020)", label = scales::comma
  ) + theme(legend.position = "left")

map

Make it interactive

df$text <- paste(df$state, "<br>Total Tests: ", df$total_tests)

map_plotly <- ggplotly(map)
map_plotly

Finished map